#! /usr/local/bin/R
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
message("Log file for code executed at\n")
message(format(Sys.time(), "%a %b %d %X %Y"))
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
library(magrittr)
library(glue);
library(lubridate)
library(stringr)
library(haven);
library(roll)
library(data.table);
library(statar)
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# --- 1. Import and add ISO Country names to currencies:
dt_fx <- fread("../task_data/output/FX_daily_long.csv")
dt_fx <- dt_fx[, .(date= ymd(date), ticker, base, foreign, prc=as.numeric(value))]


dt_ccodes <- read_dta("./input/country-codes.dta") %>% data.table

dt_fx <- merge(dt_fx, dt_ccodes[, .(base=ISO4217, base_country=ISO3166)], by = c("base"), all.x=T)
dt_fx <- merge(dt_fx, dt_ccodes[, .(foreign=ISO4217, foreign_country=ISO3166)], by = c("foreign"), all.x=T)
dt_fx[base=="TWD",base_country:="TWN"]
dt_fx[foreign=="TWD",foreign_country:="TWN"]
dt_fx[base=="HKD",base_country:="HKG"]
dt_fx[foreign=="HKD",foreign_country:="HKG"]
dt_fx[base=="EUR",base_country:="EUR"]
dt_fx[foreign=="EUR",foreign_country:="EUR"]

dt_fx <- dt_fx[wday(date) %in% c(2,3,4,5,6) ,]

dt_fx[]
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# --- 2. Constructing Base Factors (as in Lustig/Richmond(2018)), but here
setorder(dt_fx, ticker, date)
dt_fx[, datey := year(date) ]
dt_fx[, log_prc := log(prc)]
dt_fx[, d1_prc  := log_prc - tlag(log_prc, n=1L, time=date), by = .(ticker) ]

dt_fx[, range_euro := 0 ]
for (c_euro in c("ATS", "BEF", "FIM", "FRF", "DEM", "GRD", 
                 "IEP", "ITL", "NLG", "PTE", "ESP")){
  dt_fx[ base == c_euro | foreign == c_euro, range_euro := 1 ]
}

## some statistics:

# Fix pre euro currencies
dt_fx <- dt_fx[ datey > 1999 & range_euro == 1 , prc := NA ]
dt_fx <- dt_fx[ datey > 1999 & range_euro == 1 , log_prc := NA ]
dt_fx <- dt_fx[ datey > 1999 & range_euro == 1 , d1_prc := NA ]

# Estimate the base factor (and foreign)
dt_fx[, d1_base_factor_pre     := mean(d1_prc, na.rm=T), by = .(base, date) ]  # calculated for all countries
dt_fx[, d1_foreign_factor_pre  := mean(d1_prc, na.rm=T), by = .(foreign, date) ]  # calculated for all countries

dt_fx[, d1_prc_adj := d1_prc]
dt_fx[ range_euro == 1 , d1_prc_adj := NA]
dt_fx[, d1_base_factor_post     := mean(d1_prc_adj, na.rm=T), by = .(base, date) ]  # calculated for all countries - euros
dt_fx[, d1_foreign_factor_post  := mean(d1_prc_adj, na.rm=T), by = .(foreign, date) ] 

dt_fx[is.nan(d1_base_factor_pre),  d1_base_factor_pre := NA]
dt_fx[is.nan(d1_base_factor_post), d1_base_factor_post := NA]
dt_fx[is.nan(d1_foreign_factor_pre),  d1_foreign_factor_pre := NA]
dt_fx[is.nan(d1_foreign_factor_post), d1_foreign_factor_post := NA]

dt_fx <- dt_fx[ !is.na(base_country)]
dt_fx <- dt_fx[ !is.na(foreign_country)]

# From now on remove euro from analysis as in LR */
dt_fx <- dt_fx[ base != "EUR"]
dt_fx <- dt_fx[ foreign != "EUR"]
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# --- 3. ROLLING REGRESSIONS
dt_fx_reg <- copy(dt_fx)

# STD DEVIATION OF FACTORS
dt_single <- dt_fx_reg[, .(date, base_country, d1_base_factor_pre, d1_base_factor_post) ] %>% unique
setorder(dt_single, base_country, date)
dt_single[, sd_d1_base_pre  := roll_sd(d1_base_factor_pre,  width=1800L, min_obs=100L), 
          by = .(base_country) ]
dt_single[, sd_d1_base_post := roll_sd(d1_base_factor_post, width=1800L, min_obs=100L), 
          by = .(base_country) ]

dt_single[is.nan(sd_d1_base_pre), sd_d1_base_pre := NA]
dt_single[is.nan(sd_d1_base_post), sd_d1_base_post := NA]
dt_single[]

# COVARIANCE OF FACTORS and Std. Dev of LHS
setorder(dt_fx_reg, base_country, foreign_country, date)
dt_fx_reg[, cov_base_foreign_pre  := roll_cov(d1_prc, d1_base_factor_pre, width=1800L, min_obs=100L),
          by = .(base_country, foreign_country)]
dt_fx_reg[, cov_base_foreign_post := roll_cov(d1_prc, d1_base_factor_post, width=1800L, min_obs=100L),
          by = .(base_country, foreign_country)]

dt_fx_reg[, sd_d1_prc := roll_sd(d1_prc, width=1800L, min_obs=100L), by = .(base_country, foreign_country)]	

dt_fx_reg <- dt_fx_reg[ datey > 1999 & range_euro == 1 , sd_d1_prc := NA ] # Don't calculate for pre-euro currencies after 1990!
dt_fx_reg <- dt_fx_reg[ is.na(d1_prc) , sd_d1_prc := NA ] # Don't calculate if current FX is missing!

# Std. dev of difference in base and foreign averages
dt_fx_reg[, sd_d1_base_foreign_pre  := roll_sd(d1_base_factor_pre-d1_foreign_factor_pre, width=1800L, min_obs=100L), 
          by = .(base_country, foreign_country)] 
dt_fx_reg[, sd_d1_base_foreign_post := roll_sd(d1_base_factor_post-d1_foreign_factor_post, width=1800L, min_obs=100L), 
          by = .(base_country, foreign_country)] 
# Covariance  
dt_fx_reg[, cov_bases_pre := roll_cov(d1_base_factor_pre, d1_foreign_factor_pre, width=1800L, min_obs=100L),
          by = .(base_country, foreign_country)] ## Covariance of base factors
dt_fx_reg[, cov_bases_post := roll_cov(d1_base_factor_post, d1_foreign_factor_post, width=1800L, min_obs=100L),
          by = .(base_country, foreign_country)] ## Covariance of base factors

dt_fx_reg[is.nan(cov_base_foreign_pre), cov_base_foreign_pre := NA]
dt_fx_reg[is.nan(cov_base_foreign_post), cov_base_foreign_post := NA]
dt_fx_reg[is.nan(sd_d1_prc), sd_d1_prc := NA]
dt_fx_reg[]

# Now merge it back
rm(dt_fx)
dt_fx_reg <- merge(dt_fx_reg, dt_single[, .(date, base_country, sd_d1_base_pre, sd_d1_base_post)], 
                   all.x = T, by = c("date", "base_country"))
dt_fx_reg <- merge(dt_fx_reg, dt_single[, .(date, foreign_country=base_country, sd_d1_foreign_pre=sd_d1_base_pre, sd_d1_foreign_post=sd_d1_base_post)], 
                   all.x = T, by = c("date", "foreign_country"))
dt_fx_reg[]

dt_fx_reg <- dt_fx_reg[ datey > 1999 & range_euro == 1 , sd_d1_base_pre := NA ] # Don't calculate for pre-euro currencies after 1990!
dt_fx_reg <- dt_fx_reg[ datey > 1999 & range_euro == 1 , sd_d1_base_post := NA ] # Don't calculate for pre-euro currencies after 1990!
dt_fx_reg <- dt_fx_reg[ datey > 1999 & range_euro == 1 , cov_base_foreign_pre := NA ] # Don't calculate for pre-euro currencies after 1990!
dt_fx_reg <- dt_fx_reg[ datey > 1999 & range_euro == 1 , cov_base_foreign_post := NA ] # Don't calculate for pre-euro currencies after 1990!

dt_fx_reg <- dt_fx_reg[ is.na(d1_prc) , sd_d1_base_pre := NA ] # Don't calculate if current FX is missing!
dt_fx_reg <- dt_fx_reg[ is.na(d1_prc) , sd_d1_base_post := NA ] # Don't calculate if current FX is missing!
dt_fx_reg <- dt_fx_reg[ is.na(d1_prc) , cov_base_foreign_pre := NA ] # Don't calculate if current FX is missing!
dt_fx_reg <- dt_fx_reg[ is.na(d1_prc) , cov_base_foreign_post := NA ] # Don't calculate if current FX is missing!

# we can estimate betas and R2 directly from covariances and variances
dt_fx_reg[, beta_pre  :=  cov_base_foreign_pre  / sd_d1_base_pre^2 ]
dt_fx_reg[, beta_post :=  cov_base_foreign_post / sd_d1_base_post^2 ]
dt_fx_reg[, r2_pre    :=  cov_base_foreign_pre^2  / (sd_d1_base_pre * sd_d1_prc)^2 ]
dt_fx_reg[, r2_post   :=  cov_base_foreign_post^2 / (sd_d1_base_post * sd_d1_prc)^2 ]

# new measure I invented
dt_fx_reg[, var_adj_pre  := sd_d1_prc^2 / (sd_d1_base_pre^2 + sd_d1_foreign_pre^2) ]
dt_fx_reg[, var_adj_post := sd_d1_prc^2 / (sd_d1_base_post^2 + sd_d1_foreign_post^2) ]

# MEASURE AS IN COCHRANE LONGSTAFF SANTA CLARA
dt_fx_reg[, unshared_cls1_pre  := sd_d1_prc^2 / sd_d1_base_foreign_pre^2 ]
dt_fx_reg[, unshared_cls1_post := sd_d1_prc^2 / sd_d1_base_foreign_post^2 ]
# Different way of getting at it...
dt_fx_reg[, unshared_cls2_pre  := sd_d1_prc^2 / (sd_d1_base_pre^2 + sd_d1_foreign_pre^2 - 2*cov_bases_pre) ]
dt_fx_reg[, unshared_cls2_post := sd_d1_prc^2 / (sd_d1_base_post^2 + sd_d1_foreign_post^2 - 2*cov_bases_post) ]

dt_fx_reg[]
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
## Export
dt_export <- dt_fx_reg[, .(date, base, foreign, base_country, foreign_country, d1_prc, log_prc, fx_vol = sd_d1_prc,
                           beta_pre, beta_post, r2_pre, r2_post, var_adj_pre, var_adj_post, cov_base_foreign_pre, sd_d1_base_pre,
                           unshared_cls1_pre, unshared_cls1_post, unshared_cls2_pre, unshared_cls2_post)]
dt_export[, datey := year(date) ]

dt_export[, beta          := fifelse(datey<=1999, beta_pre, beta_post) ]
dt_export[, R2_y          := fifelse(datey<=1999, r2_pre, r2_post) ]
dt_export[, var_adj       := fifelse(datey<=1999, var_adj_pre, var_adj_post) ]
dt_export[, unshared_cls1 := fifelse(datey<=1999, unshared_cls1_pre, unshared_cls1_post) ]
dt_export[, unshared_cls2 := fifelse(datey<=1999, unshared_cls2_pre, unshared_cls2_post) ]
dt_export[, last_log_prc := tail(log_prc, n=1, na.rm=T, .SD), by = .(datey, base, foreign, base_country, foreign_country) ]

dt_export <- dt_export[, lapply(.SD, function(x) mean(x, na.rm=T)), 
                       .SDcols = c("d1_prc","log_prc", "fx_vol", "beta", "R2_y", "var_adj", "unshared_cls1", "unshared_cls2", "cov_base_foreign_pre", "sd_d1_base_pre"),
                       by=.(datey, base, foreign, base_country, foreign_country)]
dt_export[]

write_dta(dt_export, "./output/betas_annuals.dta")
# ---------------------------------------------------------------------

